dat %>%
ggplot(aes(year, lakeid, fill=is.na(daynum))) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~metric, nrow=6)
# labs(fill = "Missing") +
# scale_fill_scico(palette = 'imola')
# dat %>%
# ggplot(aes(year, lakeid, fill=daynum)) +
# geom_tile(color="grey") +
# theme_bw() +
# facet_wrap(~metric, nrow=6) +
# # labs(fill = "Missing") +
# scale_fill_viridis_c()
Remaining data issues:
Predict ice-on in northern lakes from other lakes ice-on dates
n_iceoff_wide = dat %>%
select(-sampledate)%>%
filter(lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR") & metric == "iceoff") %>%
pivot_wider(names_from = lakeid, values_from = daynum)
al_iceoff_model = lm(AL~BM+CB+CR+SP+TB+TR, data=n_iceoff_wide)
summary(al_iceoff_model)
##
## Call:
## lm(formula = AL ~ BM + CB + CR + SP + TB + TR, data = n_iceoff_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3721 -0.8451 0.1265 0.9507 2.7265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.10967 3.27070 -0.951 0.3496
## BM -0.03295 0.18851 -0.175 0.8624
## CB 0.24815 0.12648 1.962 0.0594 .
## CR 0.20333 0.17076 1.191 0.2434
## SP 0.31430 0.20384 1.542 0.1339
## TB 0.19500 0.10280 1.897 0.0678 .
## TR 0.09211 0.13278 0.694 0.4934
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.779 on 29 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.9793, Adjusted R-squared: 0.975
## F-statistic: 228.9 on 6 and 29 DF, p-value: < 2.2e-16
plot(n_iceoff_wide$AL, predict(al_iceoff_model, n_iceoff_wide), pch=16)
# actually looks good; use to fill
cb_iceoff_model = lm(CB~AL+BM+CR+SP+TB+TR, data=n_iceoff_wide)
summary(cb_iceoff_model)
##
## Call:
## lm(formula = CB ~ AL + BM + CR + SP + TB + TR, data = n_iceoff_wide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3604 -0.8546 0.0970 1.5519 3.4296
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.02887 4.54691 0.666 0.5106
## AL 0.47220 0.24068 1.962 0.0594 .
## BM 0.04037 0.26007 0.155 0.8777
## CR 0.46368 0.22535 2.058 0.0487 *
## SP 0.04008 0.29239 0.137 0.8919
## TB 0.08250 0.14956 0.552 0.5854
## TR -0.14222 0.18277 -0.778 0.4428
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.455 on 29 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.9584, Adjusted R-squared: 0.9498
## F-statistic: 111.5 on 6 and 29 DF, p-value: < 2.2e-16
plot(n_iceoff_wide$CB, predict(cb_iceoff_model, n_iceoff_wide), pch=16)
# really good; go ahead and fill
al_missing_iceoff = n_iceoff_wide %>% filter(is.na(AL))
al_missing_iceoff$iceoff_fill = round(predict(al_iceoff_model, al_missing_iceoff))
al_iceoff_fill = al_missing_iceoff %>%
select(metric, year, iceoff_fill) %>%
rename(daynum_fill = iceoff_fill) %>%
mutate(metric = "iceoff", lakeid = "AL")
cb_missing_iceoff = n_iceoff_wide %>% filter(is.na(CB))
cb_missing_iceoff$iceoff_fill = round(predict(cb_iceoff_model, cb_missing_iceoff))
cb_iceoff_fill = cb_missing_iceoff %>%
select(metric, year, iceoff_fill) %>%
rename(daynum_fill = iceoff_fill) %>%
mutate(metric = "iceoff", lakeid = "CB")
iceoff_fill = bind_rows(al_iceoff_fill, cb_iceoff_fill)
Predict DOC daynum from other variable daynum’s in each northern lake
no_dups = dat %>% group_by(lakeid, year, metric) %>% summarise(N = n()) %>% filter(N == 1)
## `summarise()` has grouped output by 'lakeid', 'year'. You can override using
## the `.groups` argument.
n_lakes_wide = left_join(no_dups, dat) %>%
select(-sampledate) %>%
pivot_wider(names_from = metric, values_from = daynum) %>%
filter(lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR"))
## Joining, by = c("lakeid", "year", "metric")
hold_data = na.omit(data.frame(n_lakes_wide %>% ungroup() %>% select(-year, -N))) # , -doc_predict
all = lm(doc_epiMax~(iceon+straton+stratoff+energy+
stability+anoxia_summer+
secchi_openwater_max+total_zoop_biomass+daphnia_biomass)*lakeid, data=hold_data)
i_o = lm(doc_epiMax~1, data=hold_data)
hold_step = step(i_o, scope=formula(all))
## Start: AIC=1747.95
## doc_epiMax ~ 1
##
## Df Sum of Sq RSS AIC
## + lakeid 6 74514 394308 1720.3
## + stratoff 1 15366 453457 1742.3
## + total_zoop_biomass 1 10343 458479 1744.8
## + daphnia_biomass 1 4684 464138 1747.7
## <none> 468822 1748.0
## + secchi_openwater_max 1 1158 467664 1749.4
## + iceon 1 663 468159 1749.6
## + straton 1 526 468297 1749.7
## + anoxia_summer 1 477 468345 1749.7
## + stability 1 213 468609 1749.8
## + energy 1 62 468760 1749.9
##
## Step: AIC=1720.32
## doc_epiMax ~ lakeid
##
## Df Sum of Sq RSS AIC
## + stratoff 1 6463 387845 1718.5
## + daphnia_biomass 1 4607 389700 1719.6
## + total_zoop_biomass 1 3796 390512 1720.1
## + energy 1 3779 390529 1720.1
## <none> 394308 1720.3
## + anoxia_summer 1 2246 392062 1721.0
## + iceon 1 1797 392511 1721.3
## + straton 1 132 394176 1722.2
## + secchi_openwater_max 1 27 394281 1722.3
## + stability 1 14 394294 1722.3
## - lakeid 6 74514 468822 1748.0
##
## Step: AIC=1718.53
## doc_epiMax ~ lakeid + stratoff
##
## Df Sum of Sq RSS AIC
## + energy 1 4580 383265 1717.8
## + daphnia_biomass 1 3963 383881 1718.2
## + total_zoop_biomass 1 3796 384049 1718.3
## <none> 387845 1718.5
## + iceon 1 2961 384884 1718.8
## + anoxia_summer 1 2474 385371 1719.1
## - stratoff 1 6463 394308 1720.3
## + straton 1 221 387623 1720.4
## + stability 1 86 387758 1720.5
## + secchi_openwater_max 1 7 387838 1720.5
## + stratoff:lakeid 6 11324 376521 1723.8
## - lakeid 6 65612 453457 1742.3
##
## Step: AIC=1717.81
## doc_epiMax ~ lakeid + stratoff + energy
##
## Df Sum of Sq RSS AIC
## + daphnia_biomass 1 4970 378295 1716.8
## + total_zoop_biomass 1 4401 378864 1717.2
## + iceon 1 4116 379149 1717.3
## <none> 383265 1717.8
## + anoxia_summer 1 2431 380833 1718.3
## - energy 1 4580 387845 1718.5
## + straton 1 1240 382025 1719.1
## + stability 1 344 382921 1719.6
## + secchi_openwater_max 1 49 383215 1719.8
## - stratoff 1 7264 390529 1720.1
## + stratoff:lakeid 6 10575 372690 1723.4
## + energy:lakeid 6 1479 381786 1728.9
## - lakeid 6 69453 452718 1744.0
##
## Step: AIC=1716.82
## doc_epiMax ~ lakeid + stratoff + energy + daphnia_biomass
##
## Df Sum of Sq RSS AIC
## + iceon 1 3501 374794 1716.7
## <none> 378295 1716.8
## + anoxia_summer 1 2232 376063 1717.5
## - daphnia_biomass 1 4970 383265 1717.8
## + total_zoop_biomass 1 1335 376960 1718.0
## + straton 1 1071 377223 1718.2
## - energy 1 5586 383881 1718.2
## + stability 1 174 378121 1718.7
## - stratoff 1 6597 384892 1718.8
## + secchi_openwater_max 1 58 378237 1718.8
## + stratoff:lakeid 6 11129 367165 1722.0
## + daphnia_biomass:lakeid 6 5445 372850 1725.5
## + energy:lakeid 6 2077 376218 1727.6
## - lakeid 6 69095 447390 1743.2
##
## Step: AIC=1716.69
## doc_epiMax ~ lakeid + stratoff + energy + daphnia_biomass + iceon
##
## Df Sum of Sq RSS AIC
## <none> 374794 1716.7
## - iceon 1 3501 378295 1716.8
## - daphnia_biomass 1 4355 379149 1717.3
## + anoxia_summer 1 1404 373390 1717.8
## + total_zoop_biomass 1 1334 373460 1717.9
## + straton 1 920 373873 1718.1
## + stability 1 205 374589 1718.6
## + secchi_openwater_max 1 45 374748 1718.7
## - energy 1 6670 381463 1718.7
## - stratoff 1 8015 382808 1719.5
## + stratoff:lakeid 6 10348 364446 1722.3
## + iceon:lakeid 6 8385 366409 1723.5
## + daphnia_biomass:lakeid 6 4546 370248 1725.9
## + energy:lakeid 6 2281 372512 1727.3
## - lakeid 6 68233 443026 1743.0
doc_model = lm(doc_epiMax~(iceon+straton+stratoff+energy+
stability+anoxia_summer+
secchi_openwater_max+total_zoop_biomass+daphnia_biomass)*lakeid,
data=n_lakes_wide,
na.action = na.exclude)
summary(doc_model)
##
## Call:
## lm(formula = doc_epiMax ~ (iceon + straton + stratoff + energy +
## stability + anoxia_summer + secchi_openwater_max + total_zoop_biomass +
## daphnia_biomass) * lakeid, data = n_lakes_wide, na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -141.770 -17.901 -0.181 18.898 108.897
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.278e+02 1.526e+02 2.148 0.03314 *
## iceon -5.510e-01 3.258e-01 -1.691 0.09267 .
## straton 2.033e-02 3.200e-01 0.064 0.94941
## stratoff 6.192e-01 3.397e-01 1.823 0.07017 .
## energy -4.519e-01 2.277e-01 -1.985 0.04882 *
## stability -1.468e-02 1.493e-01 -0.098 0.92178
## anoxia_summer -5.953e-02 1.682e-01 -0.354 0.72392
## secchi_openwater_max -8.503e-02 8.359e-02 -1.017 0.31053
## total_zoop_biomass 3.360e-02 6.958e-02 0.483 0.62981
## daphnia_biomass 1.050e-01 7.661e-02 1.370 0.17241
## lakeid.L -6.303e+02 4.229e+02 -1.490 0.13803
## lakeid.Q 5.458e+01 4.027e+02 0.136 0.89235
## lakeid.C 4.539e+02 4.127e+02 1.100 0.27304
## lakeid^4 -1.095e+02 4.081e+02 -0.268 0.78880
## lakeid^5 -8.383e+01 4.023e+02 -0.208 0.83517
## lakeid^6 -2.015e+02 3.718e+02 -0.542 0.58865
## iceon:lakeid.L 8.564e-01 8.399e-01 1.020 0.30942
## iceon:lakeid.Q 6.180e-01 8.622e-01 0.717 0.47456
## iceon:lakeid.C -5.787e-01 8.657e-01 -0.668 0.50477
## iceon:lakeid^4 5.088e-01 8.240e-01 0.617 0.53776
## iceon:lakeid^5 1.452e-01 8.907e-01 0.163 0.87071
## iceon:lakeid^6 1.563e+00 8.875e-01 1.761 0.08007 .
## straton:lakeid.L -1.351e-01 8.442e-01 -0.160 0.87309
## straton:lakeid.Q -6.182e-02 8.174e-01 -0.076 0.93980
## straton:lakeid.C -3.454e-02 8.578e-01 -0.040 0.96793
## straton:lakeid^4 1.203e+00 8.626e-01 1.395 0.16496
## straton:lakeid^5 1.096e+00 8.712e-01 1.258 0.21021
## straton:lakeid^6 4.844e-01 8.248e-01 0.587 0.55782
## stratoff:lakeid.L 5.681e-01 9.167e-01 0.620 0.53634
## stratoff:lakeid.Q -4.552e-01 8.073e-01 -0.564 0.57368
## stratoff:lakeid.C -2.335e-01 9.276e-01 -0.252 0.80157
## stratoff:lakeid^4 -7.509e-01 9.877e-01 -0.760 0.44820
## stratoff:lakeid^5 2.994e-01 9.383e-01 0.319 0.75005
## stratoff:lakeid^6 -1.898e+00 7.990e-01 -2.376 0.01865 *
## energy:lakeid.L 4.598e-02 6.108e-01 0.075 0.94008
## energy:lakeid.Q -2.602e-01 5.772e-01 -0.451 0.65272
## energy:lakeid.C -1.765e-02 6.038e-01 -0.029 0.97671
## energy:lakeid^4 1.434e-01 6.484e-01 0.221 0.82521
## energy:lakeid^5 -3.331e-01 5.968e-01 -0.558 0.57749
## energy:lakeid^6 1.740e-01 5.746e-01 0.303 0.76238
## stability:lakeid.L 1.712e-01 3.825e-01 0.448 0.65505
## stability:lakeid.Q 1.750e-01 4.023e-01 0.435 0.66416
## stability:lakeid.C -2.472e-01 4.025e-01 -0.614 0.53997
## stability:lakeid^4 -3.813e-01 3.455e-01 -1.103 0.27146
## stability:lakeid^5 1.388e-01 4.214e-01 0.329 0.74238
## stability:lakeid^6 3.208e-02 4.108e-01 0.078 0.93786
## anoxia_summer:lakeid.L 1.323e-02 5.044e-01 0.026 0.97911
## anoxia_summer:lakeid.Q 8.017e-02 4.902e-01 0.164 0.87029
## anoxia_summer:lakeid.C -1.146e-01 4.358e-01 -0.263 0.79294
## anoxia_summer:lakeid^4 -7.539e-03 4.701e-01 -0.016 0.98723
## anoxia_summer:lakeid^5 -5.039e-01 3.541e-01 -1.423 0.15655
## anoxia_summer:lakeid^6 3.292e-01 3.973e-01 0.829 0.40852
## secchi_openwater_max:lakeid.L 6.604e-01 2.400e-01 2.752 0.00658 **
## secchi_openwater_max:lakeid.Q -4.356e-01 2.357e-01 -1.848 0.06642 .
## secchi_openwater_max:lakeid.C -3.006e-01 2.287e-01 -1.315 0.19046
## secchi_openwater_max:lakeid^4 1.969e-01 2.017e-01 0.976 0.33036
## secchi_openwater_max:lakeid^5 -3.331e-01 2.168e-01 -1.536 0.12644
## secchi_openwater_max:lakeid^6 1.143e-01 2.007e-01 0.569 0.56986
## total_zoop_biomass:lakeid.L 1.146e-01 1.798e-01 0.638 0.52462
## total_zoop_biomass:lakeid.Q -2.267e-01 1.896e-01 -1.196 0.23347
## total_zoop_biomass:lakeid.C -3.778e-01 1.868e-01 -2.023 0.04470 *
## total_zoop_biomass:lakeid^4 -2.103e-02 1.615e-01 -0.130 0.89651
## total_zoop_biomass:lakeid^5 -1.744e-01 1.935e-01 -0.901 0.36874
## total_zoop_biomass:lakeid^6 1.050e-01 1.914e-01 0.548 0.58412
## daphnia_biomass:lakeid.L -1.785e-02 2.143e-01 -0.083 0.93373
## daphnia_biomass:lakeid.Q 7.194e-02 2.108e-01 0.341 0.73335
## daphnia_biomass:lakeid.C 2.495e-01 2.037e-01 1.225 0.22246
## daphnia_biomass:lakeid^4 2.395e-02 2.018e-01 0.119 0.90569
## daphnia_biomass:lakeid^5 2.480e-01 1.925e-01 1.288 0.19948
## daphnia_biomass:lakeid^6 -1.686e-01 1.919e-01 -0.879 0.38079
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 41.73 on 165 degrees of freedom
## (38 observations deleted due to missingness)
## Multiple R-squared: 0.389, Adjusted R-squared: 0.1334
## F-statistic: 1.522 on 69 and 165 DF, p-value: 0.01581
n_lakes_wide$doc_predict = predict(doc_model, n_lakes_wide)
cor = n_lakes_wide %>%
group_by(lakeid) %>%
summarise(r = paste("r =", round(cor(doc_epiMax, doc_predict, use="complete.obs"), 3)))
n_lakes_wide %>%
ggplot(aes(x=doc_epiMax, y=doc_predict, color=as.character(lakeid))) +
geom_point()+
theme_bw() +
labs(color="lakeid")+
geom_abline(slope=1, intercept = 0) +
facet_wrap(~lakeid) +
geom_text(aes(label=r), data=cor, x=300, y=0) +
labs(x="obs peak DOC (daynum)", y="modeled peak DOC (daynum)")
## Warning: Removed 38 rows containing missing values (`geom_point()`).
# not good but vaguely positive relationship; use this instead of median?
missing_doc = n_lakes_wide %>% filter(is.na(doc_epiMax))
missing_doc$doc_fill = round(predict(doc_model, missing_doc))
doc_fill = missing_doc %>%
select(lakeid, year, doc_fill) %>%
rename(daynum_fill = doc_fill) %>%
mutate(metric = "doc_epiMax") %>%
filter(year < 2000)
**Using dates from bad/uncalibrated chl data
filled_chl_dates = read_csv("Data/derived/chl_filled_peaks.csv") %>%
rename(daynum_fill = daynum) %>%
select(-sampledate)
## Rows: 36 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): lakeid, metric
## dbl (2): year, daynum
## date (1): sampledate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_fill = bind_rows(iceoff_fill, doc_fill, filled_chl_dates)
dat_filled = full_join(dat, all_fill) %>%
mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, FALSE)) %>%
mutate(daynum_fill = ifelse(is.na(daynum) & !is.na(daynum_fill), daynum_fill, daynum))
## Joining, by = c("lakeid", "year", "metric")
dat_filled$lakeid = factor(dat_filled$lakeid,
levels = c("AL", "BM", "CB", "CR", "SP", "TB", "TR", "FI", "ME", "MO", "WI"),
ordered = T)
vars = c("iceoff", "straton", "secchi_all", "secchi_openwater_max","secchi_openwater_min", "daphnia_biomass","zoopDensity",
"total_zoop_biomass", "chlor_all", "chlor_spring", "chlor_fall", "doc_epiMax",
"totpuf_epiMax", "totpuf_epiMin", "totpuf_hypoMax", "totpuf_hypoMin",
"anoxia", "anoxia_summer", "stability", "energy", "stratoff", "iceon")
dat_filled$metric = factor(dat_filled$metric,
levels = rev(vars),
ordered=T)
dat_filled %>%
ggplot(aes(year, metric, fill=is.na(daynum_fill))) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6) # +
# labs(fill = "Missing") +
# scale_fill_viridis_c()
Trim to “good” years, fill FI ice data with Monona, then fill additional missing with median values for each lake/metric
df_yearsWant = dat_filled %>%
filter((lakeid %in% c("FI", "ME", "MO", "WI") & year %in% 1996:2018) |
(!(lakeid %in% c("FI", "ME", "MO", "WI")) & year %in% 1982:2018))
fi_icefill = df_yearsWant %>%
filter(lakeid == "MO" & metric %in% c("iceoff", "iceon")) %>%
mutate(lakeid = "FI") %>%
select(-daynum, -filled_flag, -sampledate) %>%
rename(icefill = daynum_fill)
df_yearsWant = df_yearsWant %>%
full_join(fi_icefill) %>%
mutate(daynum_fill = ifelse(is.na(daynum_fill) & !is.na(icefill),
icefill, daynum),
filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill) & !is.na(icefill),
TRUE, filled_flag))
## Joining, by = c("lakeid", "year", "metric")
df_yearsWant %>%
ggplot(aes(year, metric, fill=filled_flag)) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6)
all_lys = df_yearsWant %>%
select(lakeid, year) %>%
distinct()
metric = unique(df_yearsWant$metric)
all_lys_want = expand_grid(all_lys, metric)
dat_final = left_join(all_lys_want, df_yearsWant)
## Joining, by = c("lakeid", "year", "metric")
medians = dat_final %>%
group_by(lakeid, metric) %>%
summarise(median_daynum = median(daynum_fill, na.rm=T))
## `summarise()` has grouped output by 'lakeid'. You can override using the
## `.groups` argument.
dat_final = dat_final %>%
left_join(medians) %>%
mutate(daynum_fill = ifelse(is.na(daynum_fill), median_daynum, daynum_fill)) %>%
mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, filled_flag)) %>%
select(-icefill, -median_daynum)
## Joining, by = c("lakeid", "metric")
dat_final %>%
ggplot(aes(year, metric, fill=daynum_fill)) +
geom_tile(color="grey") +
theme_bw() +
facet_wrap(~lakeid, nrow=6)
write_csv(dat_final, "Data/analysis_ready/final_combined_dates_filled_v2.csv")
========================================================================================= ## Old code: no longer used
no_dups = dat %>% group_by(lakeid, year, metric) %>% summarise(N = n()) %>% filter(N == 1)
## `summarise()` has grouped output by 'lakeid', 'year'. You can override using
## the `.groups` argument.
s_lakes_wide = left_join(no_dups, dat) %>%
select(-sampledate) %>%
pivot_wider(names_from = metric, values_from = daynum) %>%
filter(lakeid %in% c("FI", "ME", "MO", "WI"))
## Joining, by = c("lakeid", "year", "metric")
# stepwise
hold_data = na.omit(data.frame(s_lakes_wide %>% filter(lakeid != "FI") %>% select(-chlor_spring, -chlor_fall, -daphnia_biomass, -total_zoop_biomass)))
all = lm(chlor_all~., data=hold_data)
i_o = lm(chlor_all~1, data=hold_data)
hold = step(i_o, scope=formula(all))
## Start: AIC=336.42
## chlor_all ~ 1
##
## Df Sum of Sq RSS AIC
## + lakeid 1 23765.6 147231 332.44
## + secchi_openwater_min 1 12222.0 158774 335.45
## + totpuf_epiMin 1 10680.2 160316 335.84
## + stratoff 1 9424.6 161572 336.15
## <none> 170996 336.42
## + secchi_openwater_max 1 6617.2 164379 336.84
## + straton 1 3156.5 167840 337.68
## + zoopDensity 1 2542.1 168454 337.82
## + anoxia_summer 1 1765.7 169231 338.01
## + totpuf_hypoMax 1 992.6 170004 338.19
## + doc_epiMax 1 588.4 170408 338.28
## + totpuf_epiMax 1 340.8 170656 338.34
## + totpuf_hypoMin 1 111.7 170885 338.39
## + year 1 81.3 170915 338.40
## + iceon 1 75.2 170921 338.40
## + energy 1 23.2 170973 338.42
## + stability 1 21.0 170975 338.42
## + iceoff 1 0.7 170996 338.42
##
## Step: AIC=332.44
## chlor_all ~ lakeid
##
## Df Sum of Sq RSS AIC
## + stratoff 1 9424.6 137806 331.79
## + straton 1 7685.7 139545 332.29
## <none> 147231 332.44
## + secchi_openwater_min 1 5905.7 141325 332.80
## + totpuf_epiMin 1 5006.7 142224 333.05
## + zoopDensity 1 4245.0 142986 333.26
## + totpuf_epiMax 1 3735.8 143495 333.41
## + anoxia_summer 1 3720.5 143510 333.41
## + iceon 1 2087.2 145144 333.86
## + totpuf_hypoMax 1 1502.5 145728 334.02
## + secchi_openwater_max 1 1484.8 145746 334.03
## + energy 1 572.3 146658 334.28
## + iceoff 1 380.4 146850 334.33
## + stability 1 253.0 146978 334.37
## + totpuf_hypoMin 1 232.8 146998 334.37
## + doc_epiMax 1 100.6 147130 334.41
## + year 1 81.3 147149 334.41
## - lakeid 1 23765.6 170996 336.42
##
## Step: AIC=331.79
## chlor_all ~ lakeid + stratoff
##
## Df Sum of Sq RSS AIC
## <none> 137806 331.79
## + straton 1 5781.8 132024 332.07
## - stratoff 1 9424.6 147231 332.44
## + anoxia_summer 1 3362.7 134443 332.80
## + totpuf_epiMin 1 2578.2 135228 333.03
## + stability 1 2242.3 135564 333.13
## + zoopDensity 1 2237.4 135569 333.13
## + secchi_openwater_min 1 2168.7 135637 333.15
## + secchi_openwater_max 1 1590.3 136216 333.32
## + totpuf_hypoMax 1 1498.8 136307 333.35
## + iceon 1 732.2 137074 333.58
## + energy 1 698.6 137108 333.59
## + doc_epiMax 1 352.8 137453 333.69
## + iceoff 1 320.6 137486 333.70
## + year 1 109.6 137697 333.76
## + totpuf_epiMax 1 91.9 137714 333.76
## + totpuf_hypoMin 1 0.3 137806 333.79
## - lakeid 1 23765.6 161572 336.15
chlAll_model_MEMOWI = lm(chlor_all~anoxia_summer + lakeid + iceon + stability + straton + stratoff,
data=s_lakes_wide %>% filter(lakeid != "FI"))
summary(chlAll_model_MEMOWI)
##
## Call:
## lm(formula = chlor_all ~ anoxia_summer + lakeid + iceon + stability +
## straton + stratoff, data = s_lakes_wide %>% filter(lakeid !=
## "FI"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -115.40 -44.11 0.33 41.41 122.46
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -149.4119 271.2274 -0.551 0.5841
## anoxia_summer 0.6454 0.3620 1.783 0.0806 .
## lakeid.L -32.6015 26.4849 -1.231 0.2240
## lakeid.Q -32.9677 19.6738 -1.676 0.0999 .
## iceon 0.9296 0.6700 1.388 0.1713
## stability -0.5412 0.2784 -1.944 0.0574 .
## straton 0.6513 0.3881 1.678 0.0995 .
## stratoff -0.4440 0.5099 -0.871 0.3880
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 63.02 on 51 degrees of freedom
## (16 observations deleted due to missingness)
## Multiple R-squared: 0.2751, Adjusted R-squared: 0.1756
## F-statistic: 2.765 on 7 and 51 DF, p-value: 0.01627
plot(s_lakes_wide %>% filter(lakeid != "FI") %>% pull(chlor_all),
predict(chlAll_model_MEMOWI, s_lakes_wide %>% filter(lakeid != "FI")),
col=s_lakes_wide$lakeid, pch=16, xlab="observed", ylab="predicted")
# using to fill for now
missing_chlAll_MEMOWI = s_lakes_wide %>% filter(is.na(chlor_all) & lakeid != "FI")
missing_chlAll_MEMOWI$chl_all_fill = round(predict(chlAll_model_MEMOWI, missing_chlAll_MEMOWI))
chlAll_model_FI = lm(chlor_all~(stratoff+energy+
stability+anoxia_summer+
secchi_openwater_max),
data=s_lakes_wide %>% filter(lakeid == "FI"))
summary(chlAll_model_FI)
##
## Call:
## lm(formula = chlor_all ~ (stratoff + energy + stability + anoxia_summer +
## secchi_openwater_max), data = s_lakes_wide %>% filter(lakeid ==
## "FI"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -85.14 -30.10 1.29 39.92 108.12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 874.1551 485.7957 1.799 0.0952 .
## stratoff 0.4276 1.1790 0.363 0.7227
## energy -3.0495 1.3009 -2.344 0.0356 *
## stability -0.2118 0.7354 -0.288 0.7779
## anoxia_summer -0.2712 0.6247 -0.434 0.6713
## secchi_openwater_max -0.2282 0.2481 -0.920 0.3745
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 61.41 on 13 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.3268, Adjusted R-squared: 0.06793
## F-statistic: 1.262 on 5 and 13 DF, p-value: 0.337
plot(s_lakes_wide %>% filter(lakeid == "FI") %>% pull(chlor_all),
predict(chlAll_model_FI, s_lakes_wide %>% filter(lakeid == "FI")),
col=s_lakes_wide$lakeid, pch=16)
missing_chlAll_FI = s_lakes_wide %>% filter(is.na(chlor_all) & lakeid == "FI")
missing_chlAll_FI$chl_all_fill = round(predict(chlAll_model_FI, missing_chlAll_FI))
# gives negative value; DONT fill FI
chlAll_fill = missing_chlAll_MEMOWI %>%
select(lakeid, year, chl_all_fill) %>%
rename(daynum_fill = chl_all_fill) %>%
mutate(metric = "chlor_all")